Exercise 1

library(tidyverse)
library(insuranceData)
library(plotly)

data(dataCar)

str(dataCar)
## 'data.frame':    67856 obs. of  11 variables:
##  $ veh_value: num  1.06 1.03 3.26 4.14 0.72 2.01 1.6 1.47 0.52 0.38 ...
##  $ exposure : num  0.304 0.649 0.569 0.318 0.649 ...
##  $ clm      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ numclaims: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ claimcst0: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ veh_body : Factor w/ 13 levels "BUS","CONVT",..: 4 4 13 11 4 5 8 4 4 4 ...
##  $ veh_age  : int  3 2 2 2 4 3 3 2 4 4 ...
##  $ gender   : Factor w/ 2 levels "F","M": 1 1 1 1 1 2 2 2 1 1 ...
##  $ area     : Factor w/ 6 levels "A","B","C","D",..: 3 1 5 4 3 3 1 2 1 2 ...
##  $ agecat   : int  2 4 2 2 2 4 4 6 3 4 ...
##  $ X_OBSTAT_: Factor w/ 1 level "01101    0    0    0": 1 1 1 1 1 1 1 1 1 1 ...
head(dataCar)

a)

  dataCar %>% ggplot(mapping = aes(x = numclaims)) +
    geom_bar(fill = "navyblue")

  dataCar %>% ggplot(mapping = aes(x = agecat)) +
    geom_bar(fill = "navyblue")

  dataCar %>% ggplot(mapping = aes(x = gender)) +
    geom_bar(fill = "navyblue")

b)

dataCar %>% mutate(veh_value = (veh_value > 7)*7 + veh_value*(veh_value <= 7)) %>% 
  arrange(-veh_value) %>% 
  ggplot(mapping = aes(veh_value)) +
  geom_histogram(fill = "navyblue")

### c)

dataCar %>%
  group_by(agecat) %>% 
  summarize(avg_claims = mean(numclaims)) %>% 
  ggplot(mapping = aes(x = agecat, y = avg_claims)) +
  geom_point(fill = "navy")

The older the owner the smaller the average claim gets. That makes sense, since younger driver may drive more reckless than older people.

d)

summary(dataCar$veh_value)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.010   1.500   1.777   2.150  34.560
plot <- dataCar %>% mutate(veh_value_bin = ntile(veh_value, n=4)) %>% 
  group_by(veh_value_bin) %>% 
  summarize(avg_claims = mean(numclaims)) %>% 
  ggplot(mapping = aes(x = veh_value_bin, y = avg_claims)) +
  geom_point(fill = "navy")

plot

The higher the price of the car, the higher are the average claims.

(plot) %>% ggplotly()